In [1]:
import pylearn2.utils
import pylearn2.config
import theano
import neukrill_net.dense_dataset
import neukrill_net.utils
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import holoviews as hl
%load_ext holoviews.ipython
import sklearn.metrics
This is the model without any constraints on kernel or col norms. We want to see what value the col_norms_mean settles to to set these constraints at 80% of the value.
In [135]:
m = pylearn2.utils.serial.load(
"/disk/scratch/neuroglycerin/models/quicker_learning_1_fc_layer_experiment_no_norms_repeat_recent.pkl")
In [67]:
def make_curves(model, *args):
curves = None
for c in args:
channel = m.monitor.channels[c]
c = c[0].upper() + c[1:]
if not curves:
curves = hl.Curve(zip(channel.example_record,channel.val_record),group=c)
else:
curves += hl.Curve(zip(channel.example_record,channel.val_record),group=c)
return curves
Plot all col_norms in the fully connected and softmax composite layers.
In [68]:
means = [c for c in sorted(m.monitor.channels.keys()) if "mean" in c and "norm" in c and "col" in c]
make_curves(m,*means)
Out[68]:
Plot all kernel_norms_mean in convolutional layers.
In [69]:
means = [c for c in sorted(m.monitor.channels.keys()) if "mean" in c and "norm" in c and "kernel" in c]
make_curves(m,*means)
Out[69]:
We think that the first (and in this model - the only) fully connected layer is indicative of the value we're after.
In [136]:
channel = m.monitor.channels["train_h4_col_norms_max"]
plt.plot(channel.epoch_record, channel.val_record)
channel = m.monitor.channels["train_h4_col_norms_min"]
plt.plot(channel.epoch_record, channel.val_record)
channel = m.monitor.channels["train_h4_col_norms_mean"]
plt.plot(channel.epoch_record, channel.val_record)
Out[136]:
An attempt to find the saturating value of the mean by fitting a parabola to it and taking the min.
In [137]:
z = np.polyfit(channel.epoch_record, channel.val_record, 2)
p = np.poly1d(z)
sat = min(p(range(200)))
print(sat)
In [138]:
sat * 0.8
Out[138]:
That's the 80%.
In [139]:
channel = m.monitor.channels["train_h1_kernel_norms_max"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h1_kernel_norms_min"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h1_kernel_norms_mean"]
plt.plot(channel.val_record)
Out[139]:
In [140]:
z = np.polyfit(channel.epoch_record, channel.val_record, 2)
p = np.poly1d(z)
sat = max(p(range(200)))
sat
Out[140]:
In [141]:
sat * 0.8
Out[141]:
80% of layer 1 kernel_norm_mean
In [142]:
channel = m.monitor.channels["train_h2_kernel_norms_max"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h2_kernel_norms_min"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h2_kernel_norms_mean"]
plt.plot(channel.val_record)
Out[142]:
In [143]:
z = np.polyfit(channel.epoch_record, channel.val_record, 2)
p = np.poly1d(z)
sat = max(p(range(200)))
sat
Out[143]:
In [144]:
sat * 0.8
Out[144]:
80% of layer 2 kernel_norm_mean
In [145]:
channel = m.monitor.channels["train_h3_kernel_norms_max"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h3_kernel_norms_min"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h3_kernel_norms_mean"]
plt.plot(channel.val_record)
Out[145]:
In [146]:
z = np.polyfit(channel.epoch_record, channel.val_record, 2)
p = np.poly1d(z)
sat = max(p(range(200)))
sat
Out[146]:
In [147]:
sat * 0.8
Out[147]:
80% of layer 2 kernel_norm_mean
Attempt to set the weights with Gavin. Went wrong.
In [186]:
m = pylearn2.utils.serial.load(
"/disk/scratch/neuroglycerin/models/experiment_setting_colnorms_recent.pkl")
In [117]:
import neukrill_net.plotting as pl
pl.monitor_channels(m, ["valid_y_y_1_nll"], x_axis = "epoch")
Out[117]:
In [113]:
%%opts HeatMap style(cmap='gray')
pl.model_weights(m)
Out[113]:
In [120]:
pl.monitor_channels(m, [c for c in m.monitor.channels if "norm" in c and "max" in c], x_axis = "epoch")
Out[120]:
In [119]:
reload(pl)
Out[119]:
Attempt to set weights with Matt. Looks like it didn't break yet!
In [184]:
m = pylearn2.utils.serial.load(
"/disk/scratch/neuroglycerin/models/experiment_setting_colnorms_recent.pkl")
In [151]:
pl.monitor_channels(m, ["valid_y_y_1_nll"], x_axis = "epoch")
Out[151]:
In [152]:
pl.monitor_channels(m, [c for c in m.monitor.channels if "norm" in c and "max" in c], x_axis = "epoch")
Out[152]:
In [134]:
channel = m.monitor.channels["train_h4_col_norms_max"]
plt.plot(channel.epoch_record, channel.val_record)
channel = m.monitor.channels["train_h4_col_norms_min"]
plt.plot(channel.epoch_record, channel.val_record)
channel = m.monitor.channels["train_h4_col_norms_mean"]
plt.plot(channel.epoch_record, channel.val_record)
Out[134]:
The same model with dropout set to 0.9.
In [183]:
m_drop = pylearn2.utils.serial.load(
"/disk/scratch/neuroglycerin/models/experiment_setting_colnorms_dropout_recent.pkl")
In [163]:
pl.monitor_channels(m_drop, ["valid_y_y_1_nll"], x_axis = "epoch") + pl.monitor_channels(m_drop, ["train_y_y_1_nll"], x_axis = "epoch")
Out[163]:
Compare how fast the orginial and the dropout models are going down.
In [185]:
pl.monitor_channels(m, ["valid_y_y_1_nll"], x_axis = "epoch") + pl.monitor_channels(m_drop, ["valid_y_y_1_nll"], x_axis = "epoch")
Out[185]:
The model with more augmentations and no dropout (set to 1).
In [175]:
m_aug = pylearn2.utils.serial.load(
"/disk/scratch/neuroglycerin/models/experiment_setting_colnorms_aug_recent.pkl")
In [179]:
pl.monitor_channels(m_aug, ["valid_y_y_1_nll"], x_axis = "epoch") + pl.monitor_channels(m, ["valid_y_y_1_nll"], x_axis = "epoch")
Out[179]:
In [182]:
pl.monitor_channels(m_aug, [c for c in m.monitor.channels if "norms_mean" in c], x_axis = "epoch")
Out[182]: